import pandas as pd
import plotly.express as px
import numpy as np
import itables
import country_converter as cc
# Load the Human Development Index data
hdi = pd.read_csv("data/hdi_human_development_index.csv")

# Convert the dataframe from wide to long
hdi_long = hdi.melt(
  id_vars = ["geo", "name"],
  var_name = "year",
  value_name = "human_development_index").rename(columns = {"name": "country"})

# Filter data from 2000 onwards
hdi_long["year"] = hdi_long["year"].astype(int)

hdi_long = hdi_long.query("year >= 2000")

# Create a subset for 2023
hdi_2023 = hdi_long[hdi_long["year"] == 2023]
hdi_2023["geo"] = hdi_2023["geo"].str.upper()

hdi_top_10_countries = hdi_2023.nlargest(10, "human_development_index").sort_values(
    by = "human_development_index",
    ascending = False
)

hdi_top_10_data = hdi_long[hdi_long["country"].isin(hdi_top_10_countries["country"])]
# Load the GPD per capita data
gdp_pcap = pd.read_csv("data/gdp_pcap.csv")

# Convert the dataframe from wide to long
gdp_pcap_long = gdp_pcap.melt(
  id_vars = ["geo", "name"],
  var_name = "year",
  value_name = "gdp_per_capita").rename(columns = {"name": "country"})

# Filter data from 2000 onwards
gdp_pcap_long["year"] = gdp_pcap_long["year"].astype(int)

#gdp_pcap_long = gdp_pcap_long.query("year >= 2000")

# Categorise the gpd per capita based on actual historical data and future projection
# The threshold is determined based on https://www.gapminder.org/data/documentation/gd001/
def categorise_year (y):
  if y <= 2022:
    return "Actual"
  else:
    return "Future Projection"

categorise_year_vec = np.vectorize(categorise_year)

gdp_pcap_long["year_category"] = categorise_year_vec(gdp_pcap_long["year"])
# Load life expectancy data
life_exp = pd.read_csv("data/lex.csv")

# Convert the dataframe from wide to long
life_exp_long = life_exp.melt(
  id_vars = ["geo", "name"],
  var_name = "year",
  value_name = "life_expectancy").rename(columns = {"name": "country"})

# Filter data from 2000 onwards
life_exp_long["year"] = life_exp_long["year"].astype(int)

life_exp_long = life_exp_long.query("year >= 2000")
# Data visualization for the first indicator

# A map for HDI in 2023
hdi_map = px.choropleth(
    hdi_2023,
    locations = "geo",
    color = "human_development_index",
    hover_name = "country",
    title = "Map of Countries by Human Development Index in 2023",
    color_continuous_scale = "Blues",
).update_layout(coloraxis_showscale = False)

# Trends of HDI for ten countries that had the highest HDI in 2023

# Create the line chart
hdi_10_line_chart = px.line(hdi_top_10_data,
  x = "year",
  y = "human_development_index",
  color = "country",
  markers = True,
  labels = {"year": "Year", 
            "human_development_index": "Human Development Index"},
  title = "Changes in Human Development Index over time among the top ten countries in 2023")

hdi_10_line_chart.add_annotation(
    text="Top ten countries in 2023: The top ten countries that have the highest Human Development Index in 2023",
    xref = "paper",
    yref = "paper",
    x = 0.5,
    y = -0.3,
    showarrow = False,
    xanchor="center",
    font = dict(size=9)
)
# Data visualization for the second indicator

# Create calculate the average GDP per capita over time
gdp_pcap_avg = gdp_pcap_long.groupby(["year", "year_category"]).agg(mean_gdp_pcap = ("gdp_per_capita", "mean")).reset_index()

# Create a bar chart
gdp_pcap_avg_bar = px.bar(gdp_pcap_avg,
                          x = "year",
                          y = "mean_gdp_pcap",
                          color = "year_category",
                          labels = {"year": "Year", "mean_gdp_pcap": "Average GDP per Capita"},
                          title = "Global Average GDP per Capita over time")

gdp_pcap_avg_bar.update_layout(
    legend_title_text = "",
    legend = dict(
        orientation = "h",
        x = 0.5,
        y = -0.25,
        xanchor = "center",
        yanchor = "top"
    )
)

# Create a bar chart for countries that have high std based on actual historical data
gdp_pcap_std = (
  gdp_pcap_long.query("year >= 1990 and year <= 2022")
  .groupby("country")
  .agg(mean_gdp_pcap = ("gdp_per_capita", "mean"), 
       std_gdp_pcap = ("gdp_per_capita", "std"))
  .nlargest(10, "std_gdp_pcap")
  .sort_values(by = "std_gdp_pcap", ascending = False)
  .reset_index()
)

gdp_pcap_std_bar = px.bar(gdp_pcap_std,
                      x = "country",
                      y = "mean_gdp_pcap",
                      error_y = "std_gdp_pcap",
                      labels = {"country": "Country", "mean_gdp_pcap": "Average GDP per Capita"},
                      title = "Average GDP per capita of the top ten countries with biggest changes based on available data from 1990 to 2022")

# Create a line chart for these ten countries
gdp_std_top_10_data = gdp_pcap_long[gdp_pcap_long["country"].isin(gdp_pcap_std["country"])].query("year >= 1990 and year <= 2022")

gdp_std_top_10_line = px.line(gdp_std_top_10_data,
  
  x = "year",
  y = "gdp_per_capita",
  color = "country",
  labels = {"year": "Year", 
            "gdp_per_capita": "GDP per Capita"},
  title = "GDP per capita of the top ten countries with biggest changes based on available data from 1990 to 2022")

gdp_std_top_10_line.update_layout(
  legend_title_text = "Country"
)
# Data visualization for relationship bewteen two indicators

# Prepare data :merge hdi and gdp per capital data
hdi_gdp_combined = pd.merge(hdi_long, gdp_pcap_long, on = ["country", "geo", "year"], how = "inner")

hdi_gdp_scatter = px.scatter(hdi_gdp_combined,
  x = "gdp_per_capita",
  y = "human_development_index",
  hover_name = "country",
  animation_frame = "year",
  labels = {"human_development_index": "Human Development Index", 
            "gdp_per_capita": "GDP per Capita",
            "year": "Year"},
  title = "GDP per capita of the top ten countries with biggest changes based on available data from 1990 to 2022",
  template = "simple_white"
  )
hdi_map
Insights for page 1

Provide a description here.

hdi_10_line_chart
itables.show(hdi_top_10_countries)
Loading ITables v2.6.1 from the internet... (need help?)